{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "独热编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 1, 0, 0])"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sklearn.preprocessing import LabelBinarizer, OneHotEncoder\n",
    "\n",
    "X = np.array([\n",
    "    [1, '青绿', '蜷缩', '浊响', '清晰', '凹陷', '硬滑', 0.697, 0.460],\n",
    "    [2, '乌黑', '蜷缩', '沉闷', '清晰', '凹陷', '硬滑', 0.774, 0.376],\n",
    "    [3, '乌黑', '稍蜷', '沉闷', '稍糊', '稍凹', '硬滑', 0.666, 0.091],\n",
    "    [4, '浅白', '硬挺', '清脆', '模糊', '平坦', '硬滑', 0.245, 0.057],\n",
    "])\n",
    "y = np.array(['是', '是', '否', '否'])\n",
    "LabelBinarizer().fit_transform(y).squeeze() # 类别标记二值化\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 1., 0., 0., 1.],\n",
       "       [1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 1.],\n",
       "       [1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 1.],\n",
       "       [0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 1.]])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "enc = OneHotEncoder()\n",
    "enc.fit_transform(X[:, 1:7]).toarray()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['x0_乌黑', 'x0_浅白', 'x0_青绿', 'x1_硬挺', 'x1_稍蜷', 'x1_蜷缩', 'x2_沉闷',\n",
       "       'x2_浊响', 'x2_清脆', 'x3_模糊', 'x3_清晰', 'x3_稍糊', 'x4_凹陷', 'x4_平坦',\n",
       "       'x4_稍凹', 'x5_硬滑'], dtype=object)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "enc.get_feature_names_out()"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 64-bit",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
